#!/bin/bash
# Handles parallel execution for both GPU-based models (Qwen, Llama) and CPU-based models (GPT).
# Pick ONE script to run by uncommenting one of the SCRIPT_PATH lines below.
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
# --- For GPU models ---
# SCRIPT_PATH="$SCRIPT_DIR/../eval_scripts_other_models/lambda_qwen/qwen_static4.py"
SCRIPT_PATH="$SCRIPT_DIR/../eval_scripts_other_models/lambda_qwen/llama_static4.py"
# --- For CPU/API models ---
# SCRIPT_PATH="$SCRIPT_DIR/gpt_static_thisruns.py"
# --- Universal Config ---
TOTAL_DATA_SIZE=219
OUTPUT_DIR_NAME="results_$(basename "$SCRIPT_PATH" .py)"
OUTPUT_DIR="$SCRIPT_DIR/$OUTPUT_DIR_NAME"
SIMILARITY_JSON="$SCRIPT_DIR/similaritylist.json"
mkdir -p "$OUTPUT_DIR" "$OUTPUT_DIR/logs"
# GPU Config (only for Qwen/Llama)
GPUS=(0 1 2 3 4 5 6 7)
MAX_JOBS_PER_GPU=1
TOTAL_GPU_JOBS=$((${#GPUS[@]} * MAX_JOBS_PER_GPU))
# CPU Config (only for GPT)
TOTAL_CPU_JOBS=11 # Number of parallel jobs for API calls
# --- Slicing and Execution ---
job_counter=0
if [[ "$SCRIPT_PATH" == *gpt* ]]; then
    # GPT (CPU/API) runner logic
    DATA_PER_JOB=$(( (TOTAL_DATA_SIZE + TOTAL_CPU_JOBS - 1) / TOTAL_CPU_JOBS ))
    echo "Running GPT evaluation with $TOTAL_CPU_JOBS parallel jobs."
    for ((i=0; i<$TOTAL_CPU_JOBS; i++)); do
        start_idx=$((i * DATA_PER_JOB))
        end_idx=$((start_idx + DATA_PER_JOB - 1))
        if [ $end_idx -ge $TOTAL_DATA_SIZE ]; then
            end_idx=$((TOTAL_DATA_SIZE - 1))
        fi
        if [ $start_idx -ge $TOTAL_DATA_SIZE ]; then
            break
        fi
        echo "Launching GPT chunk $start_idx-$end_idx..."
        python3 "$SCRIPT_PATH" \
            --start "$start_idx" --end "$end_idx" \
            --output_dir "$OUTPUT_DIR" --similarity_json "$SIMILARITY_JSON" \
            > "$OUTPUT_DIR/logs/job_${start_idx}_${end_idx}.log" 2>&1 &
    done
else
    # Qwen/Llama (GPU) runner logic
    DATA_PER_JOB=$(( (TOTAL_DATA_SIZE + TOTAL_GPU_JOBS - 1) / TOTAL_GPU_JOBS ))
    echo "Running GPU evaluation with $TOTAL_GPU_JOBS total jobs."
    current_start=0
    while [ $current_start -lt $TOTAL_DATA_SIZE ]; do
        start_idx=$((job_counter * DATA_PER_JOB))
        end_idx=$((start_idx + DATA_PER_JOB - 1))
        if [ $end_idx -ge $TOTAL_DATA_SIZE ]; then
            end_idx=$((TOTAL_DATA_SIZE - 1))
        fi
        if [ $start_idx -ge $TOTAL_DATA_SIZE ]; then
            break
        fi
        gpu_idx=$((job_counter % ${#GPUS[@]}))
        gpu_id=${GPUS[$gpu_idx]}
        echo "Launching GPU job for indices $start_idx-$end_idx on GPU $gpu_id"
        CUDA_VISIBLE_DEVICES=$gpu_id nohup python -u "$SCRIPT_PATH" \
            --gpu_id 0 --start "$start_idx" --end "$end_idx" \
            --output_dir "$OUTPUT_DIR" --similarity_json "$SIMILARITY_JSON" \
            > "$OUTPUT_DIR/logs/job_${start_idx}_${end_idx}_gpu${gpu_id}.log" 2>&1 &
        ((job_counter++))
        current_start=$((end_idx + 1))
        if [ $((job_counter % TOTAL_GPU_JOBS)) -eq 0 ]; then
            echo "Waiting for batch of $TOTAL_GPU_JOBS jobs to complete..."
            wait
        fi
    done
fi
echo "Waiting for all background jobs to complete..."
wait
# --- Merge Results ---
echo "Merging results from $OUTPUT_DIR..."
python3 -c "
import json, os, glob
results = []
for f in sorted(glob.glob(os.path.join('$OUTPUT_DIR', 'static_results_*.json'))):
    with open(f, 'r') as infile:
        results.extend(json.load(infile))
with open(os.path.join('$OUTPUT_DIR', 'merged_results.json'), 'w') as outfile:
    json.dump(results, outfile, indent=2)
print(f'Merged {len(results)} records into merged_results.json')
"
echo "All chunks completed and merged." 